#!/bin/bash

model_path="/data/nfs/nvme1/meichaoyang/models/Qwen-72B-Chat_1213"


export WANDB_API_KEY=9afc62359e50f5d0b24fee88ce7ce8d162e998ed
export WANDB_DISABLED=true

export WORKSPACE=/cpfs01/shared/public
export HF_HOME=$WORKSPACE/huggingface


eval "$(conda shell.bash hook)"
conda activate /data/nfs-ten1/nfs/meichaoyang001/envs/llama_240417_cuda11_8


deepspeed --hostfile /etc/mpi/hostfile src/train.py \
    --deepspeed conf/ds_stage3_config_qwen_optimizer_off.json \
    --stage sft \
    --model_name_or_path $model_path \
    --do_train \
    --dataset alpaca_gpt4_zh \
    --template qwen \
    --finetuning_type full \
    --output_dir checkpoint/qwen_7b_chat_full_sft_alpaca_gpt4_zh_stage3_optimizer_off_bs128_lr1e-5 \
    --overwrite_cache \
    --preprocessing_num_workers 16 \
    --per_device_train_batch_size 1 \
    --gradient_accumulation_steps 8 \
    --lr_scheduler_type cosine \
    --logging_steps 10 \
    --warmup_steps 100 \
    --save_steps 2000 \
    --save_only_model \
    --learning_rate 1e-5 \
    --num_train_epochs 3.0 \
    --plot_loss \
    --bf16 \
    --flash_attn fa2 \
    --cutoff_len 102400 \
    --ignore_len 10240